# Setting up the environment, load the necessary packages
library(actuar)
library(dplyr)
library(EnvStats)
library(fitdistrplus)
library(kableExtra)
library(knitr)
library(ggplot2)
library(goftest)
library(MASS)
library(readxl)
library(tibble)
library(tidyr)
# load data
load(file = "census.RData")
load(file = "emissions.RData")
load(file = "hazards.RData")
load(file = "inflation.RData")
load(file = "rates.RData")
load(file = "prop_dist.RData")
# filtering non-impacting hazards
hazards <- hazards %>% arrange(Year) %>% filter(Property.Damage != 0)
# summarising hazard frequency
frequency <- hazards %>% group_by(Year) %>% summarise(events.pa = n())
options(scipen=999)
set.seed(123)
n_years <- 10 # feel free to adjust: e.g. if you want aggregate loss in 5 years, put n_years <- 5
n_iterations <- 100000 # number of Monte_Carlo simulations: also feel free to adjust.
size <- 1.948557
mu <- 49.704918
shapelog <- 14.829296
ratelog <- 1.413602
sev <- c()
for (i in 1:n_iterations) {
for (i in 1:n_years) {
freq_vec <- rnbinom(n_years, size = size, mu = mu)
sev_obs <- sum(rlgamma(freq_vec[i], shapelog = shapelog, ratelog = ratelog))
}
sev <- append(sev, sev_obs)
}
#sanity check: mean aggregate loss in n years / cumulative historical property damages: should be about = n_years / 60
(mean(sev)/sum(hazards$Property.Damage) )
## [1] 0.06110882
# Previusly we got 0.06579576. For n_years = 10, this should be about 0.1667
# Possible reason for small value:
# the historical losses include some years where total losses are extreme, which isn't captured by just taking mean(sev)
sev_df <- data.frame(sev)
ggplot(sev_df, aes(x = sev)) +
geom_histogram(bins = 100) +
xlim(c(0, 1000000000)) + ylim(c(0,15000)) +
ggtitle("Aggregate Loss across Instances") +
xlab("Aggregate Loss in N Years") +
ylab("Sample")
freq1 <- hazards %>% filter(Region == 1) %>% group_by(Year) %>% summarise(events.pa = n())
freq2 <- hazards %>% filter(Region == 2) %>% group_by(Year) %>% summarise(events.pa = n())
freq3 <- hazards %>% filter(Region == 3) %>% group_by(Year) %>% summarise(events.pa = n())
freq4 <- hazards %>% filter(Region == 4) %>% group_by(Year) %>% summarise(events.pa = n())
freq5 <- hazards %>% filter(Region == 5) %>% group_by(Year) %>% summarise(events.pa = n())
freq6 <- hazards %>% filter(Region == 6) %>% group_by(Year) %>% summarise(events.pa = n())
lsev1 <- hazards %>% filter(Region == 1) %>% mutate(Property.Damage = log(Property.Damage))
lsev2 <- hazards %>% filter(Region == 2) %>% mutate(Property.Damage = log(Property.Damage))
lsev3 <- hazards %>% filter(Region == 3) %>% mutate(Property.Damage = log(Property.Damage))
lsev4 <- hazards %>% filter(Region == 4) %>% mutate(Property.Damage = log(Property.Damage))
lsev5 <- hazards %>% filter(Region == 5) %>% mutate(Property.Damage = log(Property.Damage))
lsev6 <- hazards %>% filter(Region == 6) %>% mutate(Property.Damage = log(Property.Damage))
# fit historical frequency per region
nbin1 <- fitdist(freq1$events.pa, "nbinom")
nbin2 <- fitdist(freq2$events.pa, "nbinom")
nbin3 <- fitdist(freq3$events.pa, "nbinom")
nbin4 <- fitdist(freq4$events.pa, "nbinom")
nbin5 <- fitdist(freq5$events.pa, "nbinom")
nbin6 <- fitdist(freq6$events.pa, "nbinom")
# check fit using qq plots
all good!
# but frequency per year will increase because of climate change
# multiply the mu parameter by each RAF to get a table of different mus, depending on the climate scenario
freq_RAF1 <- emissions %>% mutate(
SSP1..2.6 = SSP1..2.6*nbin1$estimate[[2]],
SSP2.3.4 = SSP2.3.4*nbin1$estimate[[2]],
SSP3.6.0 = SSP3.6.0*nbin1$estimate[[2]],
SSP5.Baseline = SSP5.Baseline*nbin1$estimate[[2]],
.keep = "unused"
)
freq_RAF2 <- emissions %>% mutate(
SSP1..2.6 = SSP1..2.6*nbin2$estimate[[2]],
SSP2.3.4 = SSP2.3.4*nbin2$estimate[[2]],
SSP3.6.0 = SSP3.6.0*nbin2$estimate[[2]],
SSP5.Baseline = SSP5.Baseline*nbin2$estimate[[2]],
.keep = "unused"
)
freq_RAF3 <- emissions %>% mutate(
SSP1..2.6 = SSP1..2.6*nbin3$estimate[[2]],
SSP2.3.4 = SSP2.3.4*nbin3$estimate[[2]],
SSP3.6.0 = SSP3.6.0*nbin3$estimate[[2]],
SSP5.Baseline = SSP5.Baseline*nbin3$estimate[[2]],
.keep = "unused"
)
freq_RAF4 <- emissions %>% mutate(
SSP1..2.6 = SSP1..2.6*nbin4$estimate[[2]],
SSP2.3.4 = SSP2.3.4*nbin4$estimate[[2]],
SSP3.6.0 = SSP3.6.0*nbin4$estimate[[2]],
SSP5.Baseline = SSP5.Baseline*nbin4$estimate[[2]],
.keep = "unused"
)
freq_RAF5 <- emissions %>% mutate(
SSP1..2.6 = SSP1..2.6*nbin5$estimate[[2]],
SSP2.3.4 = SSP2.3.4*nbin5$estimate[[2]],
SSP3.6.0 = SSP3.6.0*nbin5$estimate[[2]],
SSP5.Baseline = SSP5.Baseline*nbin5$estimate[[2]],
.keep = "unused"
)
freq_RAF6 <- emissions %>% mutate(
SSP1..2.6 = SSP1..2.6*nbin6$estimate[[2]],
SSP2.3.4 = SSP2.3.4*nbin6$estimate[[2]],
SSP3.6.0 = SSP3.6.0*nbin6$estimate[[2]],
SSP5.Baseline = SSP5.Baseline*nbin6$estimate[[2]],
.keep = "unused"
)
# fit severity to log-gamma distribution
gamm1 <- fitdist(lsev1$Property.Damage, "gamma")
gamm2 <- fitdist(lsev2$Property.Damage, "gamma")
gamm3 <- fitdist(lsev3$Property.Damage, "gamma")
gamm4 <- fitdist(lsev4$Property.Damage, "gamma")
gamm5 <- fitdist(lsev5$Property.Damage, "gamma")
gamm6 <- fitdist(lsev6$Property.Damage, "gamma")
n_iterations <- 10000 #number of Monte_Carlo simulations:
# create empty vector for each region
sev1 <- c()
sev2 <- c()
sev3 <- c()
sev4 <- c()
sev5 <- c()
sev6 <- c()
for (i in 1:n_iterations) {
freq_vec <- rnbinom(1, size = nbin1$estimate[[1]], mu = nbin1$estimate[[2]])
sev_pa <- sum(exp(rgamma(freq_vec, shape = gamm1$estimate[[1]], rate = gamm1$estimate[[2]])))
sev1 <- append(sev1, sev_pa)
}
for (i in 1:n_iterations) {
freq_vec <- rnbinom(1, size = nbin2$estimate[[1]], mu = nbin2$estimate[[2]])
sev_pa <- sum(exp(rgamma(freq_vec, shape = gamm2$estimate[[1]], rate = gamm2$estimate[[2]])))
sev2 <- append(sev2, sev_pa)
}
for (i in 1:n_iterations) {
freq_vec <- rnbinom(1, size = nbin3$estimate[[1]], mu = nbin3$estimate[[2]])
sev_pa <- sum(exp(rgamma(freq_vec, shape = gamm3$estimate[[1]], rate = gamm3$estimate[[2]])))
sev3 <- append(sev3, sev_pa)
}
for (i in 1:n_iterations) {
freq_vec <- rnbinom(1, size = nbin4$estimate[[1]], mu = nbin4$estimate[[2]])
sev_pa <- sum(exp(rgamma(freq_vec, shape = gamm4$estimate[[1]], rate = gamm4$estimate[[2]])))
sev4 <- append(sev4, sev_pa)
}
for (i in 1:n_iterations) {
freq_vec <- rnbinom(1, size = nbin5$estimate[[1]], mu = nbin5$estimate[[2]])
sev_pa <- sum(exp(rgamma(freq_vec, shape = gamm5$estimate[[1]], rate = gamm5$estimate[[2]])))
sev5 <- append(sev5, sev_pa)
}
for (i in 1:n_iterations) {
freq_vec <- rnbinom(1, size = nbin6$estimate[[1]], mu = nbin6$estimate[[2]])
sev_pa <- sum(exp(rgamma(freq_vec, shape = gamm6$estimate[[1]], rate = gamm6$estimate[[2]])))
sev6 <- append(sev6, sev_pa)
}
# convert the filled in vector through iterations to dataframe
sev1_df <- data.frame(sev1)
sev2_df <- data.frame(sev2)
sev3_df <- data.frame(sev3)
sev4_df <- data.frame(sev4)
sev5_df <- data.frame(sev5)
sev6_df <- data.frame(sev6)
# such that we have the aggregate loss of each region for the following year as below
#Percentiles: help compute VaR
perc <- c(0.25, 0.5, 0.75, 0.8, 0.85, 0.9, 0.95, 0.98, 0.99, 0.995, 0.999)
Percentile <- c(perc, "Mean", "SD", "Max")
gross1 <- c(quantile(sev1, probs = perc), mean(sev1), sd(sev1), max(sev1))
gross2 <- c(quantile(sev2, probs = perc), mean(sev2), sd(sev2), max(sev2))
gross3 <- c(quantile(sev3, probs = perc), mean(sev3), sd(sev3), max(sev3))
gross4 <- c(quantile(sev4, probs = perc), mean(sev4), sd(sev4), max(sev4))
gross5 <- c(quantile(sev5, probs = perc), mean(sev5), sd(sev5), max(sev5))
gross6 <- c(quantile(sev6, probs = perc), mean(sev6), sd(sev6), max(sev6))
results.table <- data.frame(Percentile, gross1, gross2, gross3, gross4, gross5, gross6)
# gross stands for the possible loss at the respective percentile
kable(results.table) %>%
kable_styling(full_width = F)
| Percentile | gross1 | gross2 | gross3 | gross4 | gross5 | gross6 |
|---|---|---|---|---|---|---|
| 0.25 | 771576.3 | 718714.2 | 443846.3 | 269850.7 | 141469.1 | 72640.08 |
| 0.5 | 3987297.7 | 4856075.9 | 3426386.3 | 1436337.3 | 948797.0 | 561336.72 |
| 0.75 | 18257501.2 | 22071997.9 | 18877478.1 | 6551006.8 | 4679887.4 | 3792230.23 |
| 0.8 | 26925007.0 | 30607123.6 | 28112198.9 | 9401245.0 | 6739647.3 | 6190111.53 |
| 0.85 | 45142458.0 | 45622136.8 | 45743680.9 | 14713714.1 | 10858227.3 | 11298922.94 |
| 0.9 | 86112377.5 | 77176865.7 | 84903025.7 | 25475294.9 | 19459361.5 | 23162083.99 |
| 0.95 | 242350621.9 | 181775554.4 | 227029497.3 | 60111612.4 | 54253928.1 | 69771423.59 |
| 0.98 | 852990634.1 | 504828181.8 | 704316318.7 | 216839921.7 | 193619072.3 | 311120070.60 |
| 0.99 | 2042810730.9 | 992463035.0 | 1596079613.4 | 512834674.1 | 511077369.7 | 969534745.57 |
| 0.995 | 5385551358.0 | 2413655738.3 | 5029179009.2 | 1023055975.5 | 1285980236.1 | 2314848111.77 |
| 0.999 | 38322605077.1 | 13295819215.7 | 22734842542.4 | 8286855343.2 | 7329127943.9 | 16392062142.30 |
| Mean | 5044036197.1 | 142664288.5 | 231297676.3 | 44281321.4 | 45027481.1 | 91931763.45 |
| SD | 482733286024.4 | 3902624225.4 | 8431638476.9 | 693474831.4 | 781354417.5 | 1933813793.56 |
| Max | 48271245840794.7 | 336517588734.2 | 801944277209.3 | 36685482548.7 | 42054941577.4 | 121258034304.42 |
# if we look at the historical property damage
hazards_sorted <- hazards %>% group_by(Year) %>% summarise(sum(Property.Damage))
head(hazards_sorted, 10)
## # A tibble: 10 × 2
## Year `sum(Property.Damage)`
## <int> <dbl>
## 1 1960 303291
## 2 1961 707416
## 3 1962 238398
## 4 1963 1795433
## 5 1964 4623976
## 6 1965 2272604
## 7 1966 2473392
## 8 1967 2461031
## 9 1968 2225197
## 10 1969 1920886
# seems to underestimate aggregate loss
# may need to either exclude older years, or adjust damages for inflation
# formating the data for use
inflation = as.data.frame(inflation) %>%
remove_rownames %>%
column_to_rownames(var="Year")
# Data of inflation starts from 1962, we omit data before year 1962
hazards_adj <- hazards %>%
dplyr::filter(Year >= 1962)
# reiterating the steps above with inflation incorporated
for (i in 1:nrow(hazards_adj)) {
hazards_adj$Property.Damage[i] = hazards_adj$Property.Damage[i] * inflation$FV[rownames(inflation) == as.character(hazards_adj$Year[i])]
}
hazards <- hazards_adj
freq1 <- hazards %>% filter(Region == 1) %>% group_by(Year) %>% summarise(events.pa = n())
freq2 <- hazards %>% filter(Region == 2) %>% group_by(Year) %>% summarise(events.pa = n())
freq3 <- hazards %>% filter(Region == 3) %>% group_by(Year) %>% summarise(events.pa = n())
freq4 <- hazards %>% filter(Region == 4) %>% group_by(Year) %>% summarise(events.pa = n())
freq5 <- hazards %>% filter(Region == 5) %>% group_by(Year) %>% summarise(events.pa = n())
freq6 <- hazards %>% filter(Region == 6) %>% group_by(Year) %>% summarise(events.pa = n())
lsev1 <- hazards %>% filter(Region == 1) %>% mutate(Property.Damage = log(Property.Damage))
lsev2 <- hazards %>% filter(Region == 2) %>% mutate(Property.Damage = log(Property.Damage))
lsev3 <- hazards %>% filter(Region == 3) %>% mutate(Property.Damage = log(Property.Damage))
lsev4 <- hazards %>% filter(Region == 4) %>% mutate(Property.Damage = log(Property.Damage))
lsev5 <- hazards %>% filter(Region == 5) %>% mutate(Property.Damage = log(Property.Damage))
lsev6 <- hazards %>% filter(Region == 6) %>% mutate(Property.Damage = log(Property.Damage))
# fit historical frequency per region
nbin1 <- fitdist(freq1$events.pa, "nbinom")
nbin2 <- fitdist(freq2$events.pa, "nbinom")
nbin3 <- fitdist(freq3$events.pa, "nbinom")
nbin4 <- fitdist(freq4$events.pa, "nbinom")
nbin5 <- fitdist(freq5$events.pa, "nbinom")
nbin6 <- fitdist(freq6$events.pa, "nbinom")
# check fit using qq plots
looks all good!
# multiply the mu parameter by each RAF to get a table of different mus, depending on the climate scenario
freq_RAF1 <- emissions %>% mutate(
SSP1..2.6 = SSP1..2.6*nbin1$estimate[[2]],
SSP2.3.4 = SSP2.3.4*nbin1$estimate[[2]],
SSP3.6.0 = SSP3.6.0*nbin1$estimate[[2]],
SSP5.Baseline = SSP5.Baseline*nbin1$estimate[[2]],
.keep = "unused"
)
freq_RAF2 <- emissions %>% mutate(
SSP1..2.6 = SSP1..2.6*nbin2$estimate[[2]],
SSP2.3.4 = SSP2.3.4*nbin2$estimate[[2]],
SSP3.6.0 = SSP3.6.0*nbin2$estimate[[2]],
SSP5.Baseline = SSP5.Baseline*nbin2$estimate[[2]],
.keep = "unused"
)
freq_RAF3 <- emissions %>% mutate(
SSP1..2.6 = SSP1..2.6*nbin3$estimate[[2]],
SSP2.3.4 = SSP2.3.4*nbin3$estimate[[2]],
SSP3.6.0 = SSP3.6.0*nbin3$estimate[[2]],
SSP5.Baseline = SSP5.Baseline*nbin3$estimate[[2]],
.keep = "unused"
)
freq_RAF4 <- emissions %>% mutate(
SSP1..2.6 = SSP1..2.6*nbin4$estimate[[2]],
SSP2.3.4 = SSP2.3.4*nbin4$estimate[[2]],
SSP3.6.0 = SSP3.6.0*nbin4$estimate[[2]],
SSP5.Baseline = SSP5.Baseline*nbin4$estimate[[2]],
.keep = "unused"
)
freq_RAF5 <- emissions %>% mutate(
SSP1..2.6 = SSP1..2.6*nbin5$estimate[[2]],
SSP2.3.4 = SSP2.3.4*nbin5$estimate[[2]],
SSP3.6.0 = SSP3.6.0*nbin5$estimate[[2]],
SSP5.Baseline = SSP5.Baseline*nbin5$estimate[[2]],
.keep = "unused"
)
freq_RAF6 <- emissions %>% mutate(
SSP1..2.6 = SSP1..2.6*nbin6$estimate[[2]],
SSP2.3.4 = SSP2.3.4*nbin6$estimate[[2]],
SSP3.6.0 = SSP3.6.0*nbin6$estimate[[2]],
SSP5.Baseline = SSP5.Baseline*nbin6$estimate[[2]],
.keep = "unused"
)
# similarly also fit severity to log-gamma distribution
gamm1 <- fitdist(lsev1$Property.Damage, "gamma")
gamm2 <- fitdist(lsev2$Property.Damage, "gamma")
gamm3 <- fitdist(lsev3$Property.Damage, "gamma")
gamm4 <- fitdist(lsev4$Property.Damage, "gamma")
gamm5 <- fitdist(lsev5$Property.Damage, "gamma")
gamm6 <- fitdist(lsev6$Property.Damage, "gamma")
n_iterations <- 10000 #number of Monte_Carlo simulations:
sev1 <- c()
sev2 <- c()
sev3 <- c()
sev4 <- c()
sev5 <- c()
sev6 <- c()
sev_agg <- numeric(n_iterations)
for (i in 1:n_iterations) {
freq_vec <- rnbinom(1, size = nbin1$estimate[[1]], mu = nbin1$estimate[[2]])
sev_pa <- sum(exp(rgamma(freq_vec, shape = gamm1$estimate[[1]], rate = gamm1$estimate[[2]])))
sev1 <- append(sev1, sev_pa)
sev_agg[i] = sev_agg[i] + sev_pa
}
for (i in 1:n_iterations) {
freq_vec <- rnbinom(1, size = nbin2$estimate[[1]], mu = nbin2$estimate[[2]])
sev_pa <- sum(exp(rgamma(freq_vec, shape = gamm2$estimate[[1]], rate = gamm2$estimate[[2]])))
sev2 <- append(sev2, sev_pa)
sev_agg[i] = sev_agg[i] + sev_pa
}
for (i in 1:n_iterations) {
freq_vec <- rnbinom(1, size = nbin3$estimate[[1]], mu = nbin3$estimate[[2]])
sev_pa <- sum(exp(rgamma(freq_vec, shape = gamm3$estimate[[1]], rate = gamm3$estimate[[2]])))
sev3 <- append(sev3, sev_pa)
sev_agg[i] = sev_agg[i] + sev_pa
}
for (i in 1:n_iterations) {
freq_vec <- rnbinom(1, size = nbin4$estimate[[1]], mu = nbin4$estimate[[2]])
sev_pa <- sum(exp(rgamma(freq_vec, shape = gamm4$estimate[[1]], rate = gamm4$estimate[[2]])))
sev4 <- append(sev4, sev_pa)
sev_agg[i] = sev_agg[i] + sev_pa
}
for (i in 1:n_iterations) {
freq_vec <- rnbinom(1, size = nbin5$estimate[[1]], mu = nbin5$estimate[[2]])
sev_pa <- sum(exp(rgamma(freq_vec, shape = gamm5$estimate[[1]], rate = gamm5$estimate[[2]])))
sev5 <- append(sev5, sev_pa)
sev_agg[i] = sev_agg[i] + sev_pa
}
for (i in 1:n_iterations) {
freq_vec <- rnbinom(1, size = nbin6$estimate[[1]], mu = nbin6$estimate[[2]])
sev_pa <- sum(exp(rgamma(freq_vec, shape = gamm6$estimate[[1]], rate = gamm6$estimate[[2]])))
sev6 <- append(sev6, sev_pa)
sev_agg[i] = sev_agg[i] + sev_pa
}
sev1_df <- data.frame(sev1)
sev2_df <- data.frame(sev2)
sev3_df <- data.frame(sev3)
sev4_df <- data.frame(sev4)
sev5_df <- data.frame(sev5)
sev6_df <- data.frame(sev6)
agg_df <- data.frame(sev_agg)
# such that we have the aggregate loss of each region for the following year as below
#Percentiles: help compute VaR
perc <- c(0.25, 0.5, 0.75, 0.8, 0.85, 0.9, 0.95, 0.98, 0.99, 0.995, 0.999)
Percentile <- c(perc, "Mean", "SD", "Max")
gross1 <- c(quantile(sev1, probs = perc), mean(sev1), sd(sev1), max(sev1))
gross2 <- c(quantile(sev2, probs = perc), mean(sev2), sd(sev2), max(sev2))
gross3 <- c(quantile(sev3, probs = perc), mean(sev3), sd(sev3), max(sev3))
gross4 <- c(quantile(sev4, probs = perc), mean(sev4), sd(sev4), max(sev4))
gross5 <- c(quantile(sev5, probs = perc), mean(sev5), sd(sev5), max(sev5))
gross6 <- c(quantile(sev6, probs = perc), mean(sev6), sd(sev6), max(sev6))
results.table <- data.frame(Percentile, gross1, gross2, gross3, gross4, gross5, gross6)
# gross stands for the possible loss at the respective percentile
kable(results.table) %>%
kable_styling(full_width = F)
| Percentile | gross1 | gross2 | gross3 | gross4 | gross5 | gross6 |
|---|---|---|---|---|---|---|
| 0.25 | 1629122 | 1503668 | 695254.4 | 550895.2 | 333468.1 | 194038.8 |
| 0.5 | 7600693 | 8136864 | 5740506.6 | 2889949.3 | 1945553.3 | 1267204.2 |
| 0.75 | 31813657 | 33134941 | 31357535.1 | 11845188.0 | 9045598.4 | 6506298.2 |
| 0.8 | 45956634 | 45097667 | 46461285.3 | 16682941.2 | 12724498.7 | 9839231.7 |
| 0.85 | 69700881 | 66018839 | 73337046.9 | 25505572.0 | 19335621.8 | 15808223.5 |
| 0.9 | 118107966 | 107021259 | 132789273.4 | 42391769.1 | 35546309.5 | 31989814.9 |
| 0.95 | 299098477 | 238394085 | 314568412.3 | 92490508.8 | 85565374.3 | 84745089.9 |
| 0.98 | 969317650 | 638104307 | 960090973.5 | 256521679.0 | 258138884.0 | 327637895.6 |
| 0.99 | 2371394939 | 1247288513 | 2349725311.5 | 524540397.6 | 495050473.1 | 788602780.7 |
| 0.995 | 5249535516 | 2209462685 | 4772229097.5 | 1228020776.6 | 757472154.1 | 2059710239.8 |
| 0.999 | 40054422890 | 20203226080 | 32832237724.3 | 9355287842.4 | 2735246621.5 | 16217121377.4 |
| Mean | 1203891656 | 258056118 | 634654833.7 | 60393136.2 | 32302208.9 | 91630071.6 |
| SD | 99217385328 | 8705770893 | 30530976237.4 | 1149636007.6 | 375785525.7 | 1956144024.0 |
| Max | 9915532671009 | 704147416551 | 2766200940720.2 | 77977112578.9 | 26162819845.3 | 145163464873.0 |
# if we look at the historical property damage
hazards_sorted <- hazards %>% group_by(Year) %>% summarise(sum(Property.Damage))
head(hazards_sorted, 10)
## # A tibble: 10 × 2
## Year `sum(Property.Damage)`
## <int> <dbl>
## 1 1962 1999380.
## 2 1963 14743771.
## 3 1964 37223070.
## 4 1965 17926990.
## 5 1966 19027567.
## 6 1967 18327662.
## 7 1968 16129425.
## 8 1969 13455368.
## 9 1970 20713614.
## 10 1971 31066560.
# comparing the empirical and projected aggregate loss per region
emp_df1 <- data.frame(group = "emp", value = lsev1$Property.Damage)
emp_df2 <- data.frame(group = "emp", value = lsev2$Property.Damage)
emp_df3 <- data.frame(group = "emp", value = lsev3$Property.Damage)
emp_df4 <- data.frame(group = "emp", value = lsev4$Property.Damage)
emp_df5 <- data.frame(group = "emp", value = lsev5$Property.Damage)
emp_df6 <- data.frame(group = "emp", value = lsev6$Property.Damage)
emp_df <- data.frame(group = "emp", value = log(hazards_sorted$`sum(Property.Damage)`))
proj_df1 <- data.frame(group = "proj", value = log(sev1_df$sev1))
proj_df2 <- data.frame(group = "proj", value = log(sev2_df$sev2))
proj_df3 <- data.frame(group = "proj", value = log(sev3_df$sev3))
proj_df4 <- data.frame(group = "proj", value = log(sev4_df$sev4))
proj_df5 <- data.frame(group = "proj", value = log(sev5_df$sev5))
proj_df6 <- data.frame(group = "proj", value = log(sev6_df$sev6))
proj_df <- data.frame(group = "proj", value = log(agg_df$sev_agg))
> seems to underestimate aggregate loss.
> may need to either exclude older years, or adjust damages for
inflation.
Assumptions
Property are equally likely to be injured
Material and Labour costs increment follow uniform distribution ~ U(0, 0.5)
The cost of replacing Household goods follow uniform distribution ~ U(0.4, 0.75)
# Set up parameters
n_iterations <- 10000 # number of Monte_Carlo simulations:
set.seed(123)
mat_increase <- runif(n_iterations, 0,0.5)
hh_cost <- runif(n_iterations, 0.4, 0.75)
# We omit region 5 and 6 as they are low-risk region and would not relocate under our program design
loss1 <- c()
loss2 <- c()
loss3 <- c()
loss4 <- c()
loss_agg <- numeric(n_iterations)
for (i in 1:n_iterations) {
freq_vec <- rnbinom(1, size = nbin1$estimate[[1]], mu = nbin1$estimate[[2]])
sev_pa <- sum(exp(rgamma(freq_vec, shape = gamm1$estimate[[1]], rate = gamm1$estimate[[2]])))
loss <- sev_pa * (1 + mat_increase[i]) * (1 + hh_cost[i])* prop_dist[1,1]
loss1 <- append(loss1,loss)
loss_agg[i] = loss_agg[i] + loss
}
for (i in 1:n_iterations) {
freq_vec <- rnbinom(1, size = nbin2$estimate[[1]], mu = nbin2$estimate[[2]])
sev_pa <- sum(exp(rgamma(freq_vec, shape = gamm2$estimate[[1]], rate = gamm2$estimate[[2]])))
loss <- sev_pa * (1 + mat_increase[i]) * (1 + hh_cost[i])* prop_dist[2,1]
loss2 <- append(loss2,loss)
loss_agg[i] = loss_agg[i] + loss
}
for (i in 1:n_iterations) {
freq_vec <- rnbinom(1, size = nbin3$estimate[[1]], mu = nbin3$estimate[[2]])
sev_pa <- sum(exp(rgamma(freq_vec, shape = gamm3$estimate[[1]], rate = gamm3$estimate[[2]])))
loss <- sev_pa * (1 + mat_increase[i]) * (1 + hh_cost[i])* prop_dist[3,1]
loss3 <- append(loss3,loss)
loss_agg[i] = loss_agg[i] + loss
}
for (i in 1:n_iterations) {
freq_vec <- rnbinom(1, size = nbin4$estimate[[1]], mu = nbin4$estimate[[2]])
sev_pa <- sum(exp(rgamma(freq_vec, shape = gamm4$estimate[[1]], rate = gamm4$estimate[[2]])))
loss <- sev_pa * (1 + mat_increase[i]) * (1 + hh_cost[i])* prop_dist[4,1]
loss4 <- append(loss4,loss)
loss_agg[i] = loss_agg[i] + loss
}
perc <- c(0.25, 0.5, 0.75, 0.8, 0.85, 0.9, 0.95, 0.98, 0.99, 0.995, 0.999)
Percentile <- c(perc, "Mean", "SD", "Max")
gross1 <- c(quantile(loss1, probs = perc), mean(loss1), sd(loss1), max(loss1))
gross2 <- c(quantile(loss2, probs = perc), mean(loss2), sd(loss2), max(loss2))
gross3 <- c(quantile(loss3, probs = perc), mean(loss3), sd(loss3), max(loss3))
gross4 <- c(quantile(loss4, probs = perc), mean(loss4), sd(loss4), max(loss4))
results.table2 <- data.frame(Percentile, gross1, gross2, gross3, gross4)
kable(results.table2) %>%
kable_styling(full_width = F)
| Percentile | gross1 | gross2 | gross3 | gross4 |
|---|---|---|---|---|
| 0.25 | 1773280 | 1019100 | 74529.72 | 83517.7 |
| 0.5 | 8360132 | 5483970 | 600852.80 | 428597.6 |
| 0.75 | 32746798 | 22139960 | 3137050.00 | 1734636.5 |
| 0.8 | 46909268 | 30821064 | 4602860.87 | 2439286.1 |
| 0.85 | 70005650 | 45662599 | 6952049.40 | 3690976.2 |
| 0.9 | 125538099 | 77121843 | 12422503.49 | 6287813.5 |
| 0.95 | 302489888 | 172377322 | 30382419.13 | 13516231.4 |
| 0.98 | 843985864 | 480661320 | 76733338.24 | 39050930.3 |
| 0.99 | 2003026188 | 1023430145 | 158750236.36 | 74531533.8 |
| 0.995 | 4972522095 | 2013416270 | 287780740.88 | 138234940.7 |
| 0.999 | 27674584845 | 11762993830 | 1151862498.42 | 867045607.5 |
| Mean | 558710983 | 95374114 | 14223755.12 | 6267264.7 |
| SD | 39654558069 | 1467784285 | 278623079.60 | 76366550.0 |
| Max | 3960776747798 | 69502120999 | 23841760582.85 | 4617336066.4 |
sev1 <- c()
sev2 <- c()
sev3 <- c()
sev4 <- c()
sev5 <- c()
sev6 <- c()
sev_agg <- numeric(n_iterations)
for (i in 1:n_iterations) {
freq_vec <- rnbinom(1, size = nbin1$estimate[[1]], mu = freq_RAF1[2,4])
sev_pa <- sum(exp(rgamma(freq_vec, shape = gamm1$estimate[[1]], rate = gamm1$estimate[[2]])))
sev1 <- append(sev1, sev_pa)
sev_agg[i] = sev_agg[i] + sev_pa
}
for (i in 1:n_iterations) {
freq_vec <- rnbinom(1, size = nbin2$estimate[[1]], mu = freq_RAF2[2,4])
sev_pa <- sum(exp(rgamma(freq_vec, shape = gamm2$estimate[[1]], rate = gamm2$estimate[[2]])))
sev2 <- append(sev2, sev_pa)
sev_agg[i] = sev_agg[i] + sev_pa
}
for (i in 1:n_iterations) {
freq_vec <- rnbinom(1, size = nbin3$estimate[[1]], mu = freq_RAF3[2,4])
sev_pa <- sum(exp(rgamma(freq_vec, shape = gamm3$estimate[[1]], rate = gamm3$estimate[[2]])))
sev3 <- append(sev3, sev_pa)
sev_agg[i] = sev_agg[i] + sev_pa
}
for (i in 1:n_iterations) {
freq_vec <- rnbinom(1, size = nbin4$estimate[[1]], mu = freq_RAF4[2,4])
sev_pa <- sum(exp(rgamma(freq_vec, shape = gamm4$estimate[[1]], rate = gamm4$estimate[[2]])))
sev4 <- append(sev4, sev_pa)
sev_agg[i] = sev_agg[i] + sev_pa
}
for (i in 1:n_iterations) {
freq_vec <- rnbinom(1, size = nbin5$estimate[[1]], mu = freq_RAF5[2,4])
sev_pa <- sum(exp(rgamma(freq_vec, shape = gamm5$estimate[[1]], rate = gamm5$estimate[[2]])))
sev5 <- append(sev5, sev_pa)
sev_agg[i] = sev_agg[i] + sev_pa
}
for (i in 1:n_iterations) {
freq_vec <- rnbinom(1, size = nbin6$estimate[[1]], mu = freq_RAF6[2,4])
sev_pa <- sum(exp(rgamma(freq_vec, shape = gamm6$estimate[[1]], rate = gamm6$estimate[[2]])))
sev6 <- append(sev6, sev_pa)
sev_agg[i] = sev_agg[i] + sev_pa
}
sev1_df <- data.frame(sev1)
sev2_df <- data.frame(sev2)
sev3_df <- data.frame(sev3)
sev4_df <- data.frame(sev4)
sev5_df <- data.frame(sev5)
sev6_df <- data.frame(sev6)
agg_df <- data.frame(sev_agg)
perc <- c(0.25, 0.5, 0.75, 0.8, 0.85, 0.9, 0.95, 0.98, 0.99, 0.995, 0.999)
Percentile <- c(perc, "Mean", "SD", "Max")
gross1 <- c(quantile(sev1, probs = perc), mean(sev1), sd(sev1), max(sev1))
gross2 <- c(quantile(sev2, probs = perc), mean(sev2), sd(sev2), max(sev2))
gross3 <- c(quantile(sev3, probs = perc), mean(sev3), sd(sev3), max(sev3))
gross4 <- c(quantile(sev4, probs = perc), mean(sev4), sd(sev4), max(sev4))
gross5 <- c(quantile(sev5, probs = perc), mean(sev5), sd(sev5), max(sev5))
gross6 <- c(quantile(sev6, probs = perc), mean(sev6), sd(sev6), max(sev6))
results.table3 <- data.frame(Percentile, gross1, gross2, gross3, gross4, gross5, gross6)
kable(results.table3) %>%
kable_styling(full_width = F)
| Percentile | gross1 | gross2 | gross3 | gross4 | gross5 | gross6 |
|---|---|---|---|---|---|---|
| 0.25 | 2253827 | 2027259 | 1082091 | 841328.6 | 475780.7 | 283785.4 |
| 0.5 | 9871308 | 10154043 | 8430743 | 3914267.2 | 2599826.3 | 1711185.4 |
| 0.75 | 39292323 | 40692292 | 41054365 | 15310939.5 | 10362078.5 | 8688070.7 |
| 0.8 | 55428815 | 56744851 | 58447877 | 20933642.9 | 14602333.8 | 13313130.0 |
| 0.85 | 81825775 | 81338836 | 92664312 | 31407893.0 | 22378339.4 | 22406803.2 |
| 0.9 | 140583587 | 132491259 | 166252708 | 54261843.8 | 38806285.9 | 41667822.9 |
| 0.95 | 324909592 | 277850366 | 399773690 | 120193097.4 | 92573237.3 | 119270332.0 |
| 0.98 | 1062707651 | 708535351 | 1281936291 | 357693151.0 | 251844867.1 | 438675650.5 |
| 0.99 | 2514425435 | 1554361731 | 2636870598 | 686498785.9 | 560331425.0 | 1147060237.3 |
| 0.995 | 4907374494 | 3551831203 | 4658265143 | 1535520349.1 | 1434040640.1 | 2740432992.3 |
| 0.999 | 26336456382 | 13755101965 | 19262003665 | 7942801272.7 | 10588742218.8 | 15540346011.2 |
| Mean | 267774620 | 183964977 | 322341756 | 105824411.7 | 149074479.4 | 1361449511.9 |
| SD | 8503374319 | 5102168758 | 16347868942 | 4541330211.3 | 8513156508.6 | 123506949096.5 |
| Max | 778764582297 | 475226499381 | 1624585172773 | 428792212121.5 | 843691653666.6 | 12344106818691.6 |
mat_increase <- runif(n_iterations, 0,0.5)
hh_cost <- runif(n_iterations, 0.4, 0.75)
loss1 <- c()
loss2 <- c()
loss3 <- c()
loss4 <- c()
loss_agg <- numeric(n_iterations)
for (i in 1:n_iterations) {
freq_vec <- rnbinom(1, size = nbin1$estimate[[1]], mu = freq_RAF1[2,4])
sev_pa <- sum(exp(rgamma(freq_vec, shape = gamm1$estimate[[1]], rate = gamm1$estimate[[2]])))
loss <- sev_pa * (1 + mat_increase[i]) * (1 + hh_cost[i])* prop_dist[1,1]
loss1 <- append(loss1,loss)
loss_agg[i] = loss_agg[i] + loss
}
for (i in 1:n_iterations) {
freq_vec <- rnbinom(1, size = nbin2$estimate[[1]], mu = freq_RAF2[2,4])
sev_pa <- sum(exp(rgamma(freq_vec, shape = gamm2$estimate[[1]], rate = gamm2$estimate[[2]])))
loss <- sev_pa * (1 + mat_increase[i]) * (1 + hh_cost[i])* prop_dist[2,1]
loss2 <- append(loss2,loss)
loss_agg[i] = loss_agg[i] + loss
}
for (i in 1:n_iterations) {
freq_vec <- rnbinom(1, size = nbin3$estimate[[1]], mu = freq_RAF3[2,4])
sev_pa <- sum(exp(rgamma(freq_vec, shape = gamm3$estimate[[1]], rate = gamm3$estimate[[2]])))
loss <- sev_pa * (1 + mat_increase[i]) * (1 + hh_cost[i])* prop_dist[3,1]
loss3 <- append(loss3,loss)
loss_agg[i] = loss_agg[i] + loss
}
for (i in 1:n_iterations) {
freq_vec <- rnbinom(1, size = nbin4$estimate[[1]], mu = freq_RAF4[2,4])
sev_pa <- sum(exp(rgamma(freq_vec, shape = gamm4$estimate[[1]], rate = gamm4$estimate[[2]])))
loss <- sev_pa * (1 + mat_increase[i]) * (1 + hh_cost[i])* prop_dist[4,1]
loss4 <- append(loss4,loss)
loss_agg[i] = loss_agg[i] + loss
}
perc <- c(0.25, 0.5, 0.75, 0.8, 0.85, 0.9, 0.95, 0.98, 0.99, 0.995, 0.999)
Percentile <- c(perc, "Mean", "SD", "Max")
gross1 <- c(quantile(loss1, probs = perc), mean(loss1), sd(loss1), max(loss1))
gross2 <- c(quantile(loss2, probs = perc), mean(loss2), sd(loss2), max(loss2))
gross3 <- c(quantile(loss3, probs = perc), mean(loss3), sd(loss3), max(loss3))
gross4 <- c(quantile(loss4, probs = perc), mean(loss4), sd(loss4), max(loss4))
results.table4 <- data.frame(Percentile, gross1, gross2, gross3, gross4)
kable(results.table4) %>%
kable_styling(full_width = F)
| Percentile | gross1 | gross2 | gross3 | gross4 |
|---|---|---|---|---|
| 0.25 | 2335259 | 1342749 | 100769.9 | 112353.7 |
| 0.5 | 10355933 | 7051089 | 758106.0 | 541429.2 |
| 0.75 | 41302714 | 26843289 | 3927277.9 | 2203397.0 |
| 0.8 | 57970835 | 37150681 | 5635489.4 | 3011946.9 |
| 0.85 | 88649842 | 53646685 | 8873599.8 | 4615253.1 |
| 0.9 | 151413958 | 87399398 | 15567404.2 | 7691604.5 |
| 0.95 | 367651779 | 187222273 | 39951542.0 | 17245138.2 |
| 0.98 | 1161887954 | 474111273 | 116524114.7 | 49251027.9 |
| 0.99 | 2547962197 | 922411230 | 272785822.1 | 106019873.7 |
| 0.995 | 5908277721 | 2062009198 | 624060387.7 | 205903684.5 |
| 0.999 | 37035206478 | 5409505917 | 2915694448.3 | 824002405.7 |
| Mean | 424106761 | 63537714 | 21106645.3 | 9693366.6 |
| SD | 19462748071 | 433677206 | 279353331.3 | 218267614.1 |
| Max | 1917033156560 | 22233498667 | 17929860326.9 | 19770385742.4 |